################################################################################
##
## Purpose: This script creates SI Figure 22
##
## Author: James Bisbee (james.h.bisbee@vanderbilt.edu)
##
## Input Files:
##  - ./data/finalData.RData
##
## Output Files:
##  - ./output/figures/SI_figure_22.pdf
##
##
## See associated log file for compute environment, package versions, 
##  and date of most recent run.
rm(list = ls())
gc()
require(tidyverse)
require(ggridges)
require(ggrepel)
require(marginaleffects)

set.seed(123)

# Compute details
print(paste0('Compute environment from ',Sys.Date(),' run by Bisbee'))
if(Sys.info()['sysname'] == 'Windows') {
  ram_size = system("wmic MemoryChip get Capacity", intern = TRUE)[-1]
  model_name = system("wmic cpu get name", intern = TRUE)[2] # nocov
  vendor_id = system("wmic cpu get manufacturer", intern = TRUE)[2] # nocov
  
  print(list(ram = stringr::str_squish(ram_size)[1],
             vendor_id = stringr::str_squish(vendor_id),
             model_name = stringr::str_squish(model_name),
             no_of_cores = parallel::detectCores()))
} else if(Sys.info()['sysname'] == 'Linuxs') {
  splitted <- strsplit(system("ps -C rsession -o %cpu,%mem,pid,cmd", intern = TRUE), " ")
  df <- do.call(rbind, lapply(splitted[-1], 
                              function(x) data.frame(
                                cpu = as.numeric(x[2]),
                                mem = as.numeric(x[4]),
                                pid = as.numeric(x[5]),
                                cmd = paste(x[-c(1:5)], collapse = " "))))
  df
} else {
  cat("If not on Linux or Windows, you'll have to figure out your own solution to seeing the compute environment.")
}

load('./data/prepped/finalData.RData')

toplot <- NULL
for(grs in c("(m not(sure|certain|positive|aware)|(do not|don't) know|m unsure)",
             "back to you|follow up with you|look into|(need|have) to (check|confer|look)",
             "(i am sorry|i'm sorry|i apologize|forgive me)")) {
  toplot <- utterance_level %>%
    select(docID,chamber,opensecretsID,text,interrupted) %>%
    filter(grepl('FED',opensecretsID)) %>%
    group_by(opensecretsID) %>%
    mutate(nTot = n()) %>%
    filter(grepl('FED',opensecretsID),
           grepl(grs,tolower(text))) %>%
    count(opensecretsID,interrupted,nTot) %>%
    ungroup() %>%
    mutate(pct = n / nTot,
           opensecretsID = factor(str_to_title(gsub('FED','',opensecretsID)),
                                  levels = c('Greenspan','Bernanke','Yellen','Powell'))) %>%
    mutate(grp = grs) %>%
    bind_rows(toplot)
}

toplot <- toplot %>% 
  group_by(opensecretsID,grp) %>%
  mutate(lab = pct / sum(pct))  %>%
  ungroup() %>%
  mutate(grp = ifelse(grepl('sorry',grp),'Apologetic Language',
                      ifelse(grepl('unsure',grp),'Uncertain Language','Delaying Language')))

pdf('./output/figures/SI_figure_22.pdf',width = 8,height = 4)
toplot %>%
  ggplot(aes(x = opensecretsID,y = pct,fill = factor(interrupted))) + 
  geom_bar(stat = 'identity',color = 'grey70',linewidth = .2) + 
  scale_y_continuous(labels = scales::percent) + 
  scale_fill_manual(values = c('white','grey30'),labels = c('Not interrupted','Interrupted')) + 
  labs(x = 'Fed Chair',
       y = 'Proportion of total utterances',
       fill = 'Utterance',
       title = 'Uncertainty and Obsequiousness',
       subtitle = 'Proportions of utterances with keywords, of which interrupted in gray') + 
  theme_bw() + 
  geom_text(data = toplot %>%
              filter(interrupted == 1),
            aes(label = paste0(round(lab*100,1),'%')),
            vjust = -.2) + 
  facet_wrap(~grp) + 
  theme(legend.position = 'bottom',
        axis.text.x = element_text(size = 8))
dev.off()

# EOF